;;########################################################################
;; statfunc.lsp
;; 1) new statistical functions
;; 2) new and modified functions by Pedro Valero to deal with missing data
;; 3) chi-square, binomial and Cochran-Mantel-Haenszel statistics
;; Copyright (c) 1991-99 by Forrest W. Young
;;########################################################################

;Secondary Least Squares Monotonic Transformation

(defun lsmt (ordinal interval)
"Args: ORDINAL INTERVAL
Computes the least squares monotonic transformation of ORDINAL given INTERVAL.
ORDINAL  is a vector or list of numbers of an ordinal  variable.
INTERVAL is a vector or list of numbers of an interval variable.
ORDINAL and INTERVAL must have the same number of elements.
Returns a vector or list of the numbers which are (weakly) in the same order
as ORDINAL and which are a least squares fit to INTERVAL.
The secondary approach to ties is used (ties remain tied).
Written by Forrest W. Young 10/26/94"
(when (/= (length ordinal) (length interval))
        (error "The ordinal and interval variables must be the same length."))
  (let* ((n (length ordinal))
         (rank-order (order ordinal))
         (X (select interval rank-order))
         (Y (select ordinal  rank-order))
         (block-mean nil)
         (block-size nil)
         (tie-mean nil)
         (j nil)
         (lower nil)
         )
;force tied data to remain tied
    (when (/= n (length (remove-duplicates ordinal)))
          (dolist (i (iseq 0 (- n 2)))
                  (when (and (= (select Y i) (select Y (1+ i)))
                             (not lower))
                        (setf lower i))
                  (when (and lower
                             (or (< (select Y i) (select Y (1+ i)))
                                 (= i (- n 2))))
                        (setf tie-mean 0)
                        (when (= i (- n 2)) (setf i (- n 1)))
                        (dolist (j (iseq lower i))
                                (setf tie-mean (+ tie-mean (select X j))))
                        (setf tie-mean (/ tie-mean (- (1+ i) lower)))
                        (dolist (j (iseq lower i))
                                (setf (select X j) tie-mean))
                        (setf lower nil))))
    (setf Y (copy-list X))
;compute the least squares monotonic transformation
    (dolist (i (iseq 1 (1- n)))
            (setf block-size 1)
            (setf block-mean (select x i))
            (loop
             (setf j (- i block-size))
             (when (or (< j 0) (>= block-mean (select y j))) (return))
             (setf block-size (1+ block-size))
             (setf block-mean  (/ (+ (* (1- block-size) block-mean)
                                     (select x j)) block-size)))
            (dolist (k (iseq (1+ j) i)) (setf (select Y k) block-mean)))
    (select Y (order rank-order))))

(defun list-all-statistics () (summarize-data))

(defun list-moments-quartiles-and-ranges ()
  (summarize-data :data nil :moments t :quartiles t :ranges t
                  :correlations t :covariances t frequencies nil))

(defun standardize (sequence)
"Arg: SEQUENCE
(- sequence (mean sequence)) / (standard-deviation sequence)"
  (/ (- sequence (mean sequence)) (standard-deviation sequence)))

(defun select-matrix (array i)
"Args: ARRAY I
Selects matrix I from ARRAY (array-rank 3)."
  (when (not (arrayp array)) (error "Not an Array"))
  (when (/= (array-rank array) 3) (error "Array-rank must be 3."))
     (let* ((sizes (array-dimensions array))
            (matrix-sizes (list (first sizes) (second sizes)))
            (higher-sizes (rest sizes))
            (higher-sizes-sequences (mapcar #'iseq higher-sizes))
            )
     (matrix matrix-sizes
           (combine (apply #'select array i higher-sizes-sequences)))))

(defun row-list2 (matrix &key (list nil))
"Args: MATRIX &KEY (LIST NIL)
Row-list function which can optionally produce its output as a list of lists."
  (if list
      (mapcar #'(lambda (x) (coerce x 'list)) (row-list matrix))
      (row-list matrix) ) )

(defun column-list2 (matrix &key (list nil))
"Args: MATRIX &KEY (LIST NIL)
Column-list function which can optionally produce its output as a list of lists."
  (if list
      (mapcar #'(lambda (x) (coerce x 'list)) (column-list matrix))
      (column-list matrix) ) )

(defun bind-rows2 (items &rest args)
"Args: ITEMS &REST ARGS
Bind-rows function which can optionally operate on lists-of-items, as well as separate lists of items."
  (let (
        (items2 (if (some #'compound-data-p items)
                    (apply #'bind-rows items)
                    (bind-rows items)) )
       )
    (cond ( (null args) items2 )
          ( (some #'compound-data-p args)
            (bind-rows items2 (apply #'bind-rows2 args)) )
          ( t (bind-rows items2 args) ) ) ) )

(defun bind-columns2 (items &rest args)
"Args: ITEMS &REST ARGS
Bind-columns function which can optionally operate on lists-of-items, as well as separate lists of items."
  (let (
        (items2 (if (some #'compound-data-p items)
                    (apply #'bind-columns items)
                    (bind-columns items)) )
       )
    (cond ( (null args) items2 )
          ( (some #'compound-data-p args)
            (bind-columns items2 (apply #'bind-columns2 args)) )
          ( t (bind-columns items2 args) ) ) ) )

(defun row (a i &key (list nil))
"Args: (matrix row-number)
 Takes a matrix and returns the row specified by row-number. If the keyword
 argument :LIST is set to t, the row is returned as a list."
  (select (row-list2 a :list list) i) )

(defun col (a i &key (list nil))
 "Args: (matrix column-number)
 Takes a matrix and returns the column specified by column-number. If the
 keyword argument :LIST is set to t, the row is returned as a list."
  (select (column-list2 a :list list) i))

(defun lists-to-matrix (list-of-lists)
 "Args: (list-of-list)
 Takes a list of lists and returns a matrix with columns equal to the lists."
  (apply #'bind-columns list-of-lists)  )

;(defun fuzz (arg &optional (num-decimal-places 2))
;"Args: (arg &optional num-decimal-places)
; Fuzzes the precision of any type of numeric argument to at most ;num-decimal-places decimal places.  Default num-decimal-places = 2."
;  (let ((factor (^ 10 num-decimal-places)))
;    (cond
;      ((stringp arg) (select arg (iseq num-decimal-places)))
;      (t (/ (round (* arg factor)) factor)))))

;fwy 9-27-02
(defun fuzz (arg &optional (num-decimal-places 2) (k 1))
"Args: (arg &optional num-decimal-places)
 Fuzzes the precision of any type of numeric argument to at most num-decimal-places decimal places.  Default num-decimal-places = 2."
  (cond
    ((numberp arg)
     (let* ((factor (^ 10 num-decimal-places))
            (fuzzed (/ (round (* arg factor)) factor))
            (formatted (format nil "~d" fuzzed))
            (decimal-loc (position #\. formatted :test #'equal))
            (num-decimals (if decimal-loc (- (length formatted) decimal-loc 1) 0))
            (last-digit (number-from-string (string (select (reverse formatted) 0))))
            (substr)
            )
       ;(print (LIST arg fuzzed formatted decimal-loc num-decimals last-digit))
       (cond
         ((> num-decimals num-decimal-places)
          (cond
            ((> k 3)
             (setf substr (select formatted (iseq (+ decimal-loc 3 num-decimal-places))))
             (number-from-string substr))
            (t
             (fuzz arg (1+ num-decimal-places) (1+ k)))))
         (t fuzzed))))
    (t
     (let ((factor (^ 10 num-decimal-places)))
       (/ (round (* arg factor)) factor)))))


(defun vector-origins (graphical-object
                        &optional (var-indices
                                   (iseq (send graphical-object
                                                       :num-variables))) )
"A function which can be used to find the origin wrt to any (sub)set of variables in any graph. Produces a list of centers for the specified variables. Primarily useful for adding vectors to a plot."
  (split-list (send graphical-object :center var-indices) 1) )

(defun /0 (numer-seq denom-seq &optional (result 0))
"Args: NUMERATOR DENOMINATOR RESULT
Protected division function: Divides NUMERATOR by DENOMINATOR trapping impending divide by zero. First two arguments may be a number or a sequence, third argument must be a number. Returns a number or a sequence of numbers, each number being the result of the division, or, when trapped, RESULT. For use only in mosaic plot. Written by Ernest Kwan"
  (let ((flag (if (find '0 denom-seq) t nil))
        (result
         (if (find '0 denom-seq)
             (mapcar #'(lambda (numer denom)
                         (if (= 0 denom)
                             (repeat result (length numer))
                             (/ numer denom)))
                     (coerce numer-seq 'list) (coerce denom-seq 'list))
             (/ numer-seq denom-seq))))
    (list result flag)))


(defun size (matrix-a)
"Args: MATRIX
Alias of ARRAY-DIMENSIONS"
  (array-dimensions matrix-a))

(defun nscores (x)
"Args: (x)
Returns the normal scores of the elements of x."
  (let ((ranks (rank x))
        (n (length x)))
    (normal-quant (/ (1+ ranks) (1+ n)))))

(defun ssq (x)
"Args: (x)
Returns the sum of squares of the elements of x."
  (sum (^ x 2)))

(defun standard-deviation (x)
"Args: (x)
Returns the standard deviation of the elements x. Vector reducing.
Issues warning on zero standard deviation and stops. Return nil when one datum."
  (let* ((n (count-elements x))
         (r (- x (mean x)))
         (sd nil))
    (when (> n 1) (setf sd (sqrt (* (mean (* r r)) (/ n (- n 1)))))
          (when (= sd 0)
                (error-message "A variable has zero variance. Computations will continue, but may be unstable or inaccurate.")
                ;(toplevel)
                ))
    sd))

(defun variance (x)
  (let ((result (standard-deviation x)))
    (when result (setf result (^ result 2)))
    result))

(defun range (x)
  (let ((up-low (select (fivnum x) (list 4 0)))
        )
    (- (select up-low 0) (select up-low 1))))

(defun mid-range (x)
    (mean (select (fivnum x) (list 0 4))))

;;the following two formulas taken from SAS elementary stats procs p 11
;;they give the same results shown for SAS PROC UNIVARIATE

(defun skewness (x)
  (let ((std-cubed (^ (standard-deviation x) 3))
        (dev-cubed (^ (- x (mean x)) 3))
        (n (length x))
        (skew nil))
    (if (= 0 std-cubed) (error-message "Skewness Undefined.")
        (when (> n 2)
          (setf skew
                (* (/ n (* (- n 1) (- n 2))) (sum (/ dev-cubed std-cubed))))))
    ))

(defun kurtosis (x)
  (let ((std-fourth (^ (standard-deviation x) 4))
        (dev-fourth (^ (- x (mean x)) 4))
        (n (length x))
        (kurtosis nil))
    (if (= 0 std-fourth) (error-message "Kurtosis Undefined.")
        (when (> n 3)
          (setf kurtosis (- (* (/ (* n (+ n 1)) (* (- n 1) (- n 2) (- n 3)))
                         (sum (/ dev-fourth std-fourth)))
                         (/ (* 3 (- n 1) (- n 1)) (* (- n 2) (- n 3)))))))
    ))


(defun var-type (var-name)
"Args: VAR-NAME
Finds the variable-type of VAR-NAME, which is assumed to be in the current-data"
     (let ((position
            (position var-name
                      (map-elements #'string-downcase (send $ :variables))
                      :test #'equal)))
       (when position (select (send $ :types) position))))

(defun correlation (x y)
"Args: X Y
Takes two vectors or lists of the same length and returns their correlation."
  (let* ((recip-nm1 (/ 1 (1- (length x))))
         (meanx (mean x))
         (meany (mean y))
         (centeredx (coerce (- x meanx) 'vector ))
         (centeredy (coerce (- y meany) 'vector )))
    (/ (* (sum (* centeredx centeredy)) recip-nm1)
       (* (sqrt (* (sum (* centeredx centeredx)) recip-nm1))
          (sqrt (* (sum (* centeredy centeredy)) recip-nm1))))))



(defun correlation-matrix (matrix &key types)
"Args: (matrix &key types)
 Takes a nxm multivariate matrix and returns an nxn correlation matrix. Types is
an m-vector of variable type strings (category, ordinal, numeric). Uses all n
of the input matrix's variables if :types not specified, only the numeric variables if :types is specifed."
  (let ((it matrix))
    (when types
          (setf it (select matrix (iseq (select (size matrix) 0))
                           ($position '("numeric") types))))
    (setf it (normalize (center it) 1))
    (/ (matmult (transpose it) it) (- (select (array-dimensions it) 0) 1))))

(defun sv-decomp2 (matrix)
  (let* (
         (rc (array-dimensions matrix) )
         (r (select rc 0) )
         (c (select rc 1) )
        )
    (if (< r c)
        (let (
              (svd (sv-decomp (transpose matrix)) )
             )
          (list (select svd 2) (select svd 1) (select svd 0) (select svd 3)))
        (sv-decomp matrix) ) ) )

(defun pca (data &key (corr t) (left-alpha 1))
"Args: DATA &key (CORR t)
Performs a principal components analysis of DATA. Computes Covariances if CORR is nil. Returns a list of three things: A matrix of scores, a vector of eigenvalues & a matrix of coefficients."
  (let* (
         (prepped-data (if corr
                           (/ (normalize (center data) 1)
                              (sqrt (1- (select (array-dimensions data) 0))) )
                           (/ (center data)
                              (sqrt (1- (select (array-dimensions data) 0))) )
                           ) )
         (svd (sv-decomp2 prepped-data) )
         )
    (when (< (sum (col (select svd 2) 0)) 0)
        (setf (select svd 0) (* (- 1) (select svd 0)) )
        (setf (select svd 2) (* (- 1) (select svd 2)) ) )
    (list (%* (select  svd 0) (diagonal (^ (select svd 1) left-alpha)))
        (^ (select svd 1) 2)
        (%* (select  svd 2) (diagonal (^ (select svd 1) (1- left-alpha)))))))

(defun center (data)
"Args: DATA
Centers the columns of the matrix DATA. Returns a matrix."
  (apply #'bind-columns (map-elements #'function-with-missing  #'(lambda (x) (- x (mean x))) (column-list data)) ) )
;; modified by PV to deal with missing data 30.7.98

(defun normalize (data &optional (std-dev 1))
"Args: DATA-MATRIX &OPTIONAL STD-DEV
Normalize the scores on each variable to a mean of 0 and a standard deviation of 1 or any other desired value.  (Actually, SQRT(N-1) times DESIRED-STDEV.)"
  (apply #'bind-columns
         (map-elements #'function-with-missing
                       #'(lambda (x)
                           (* std-dev (/ x (standard-deviation (non-missing x)))))
                       (column-list (center data))) ) )
;; modified by PV to deal with missing data 30.7.98

(defun orthogonal-procrustean-rotation (matrix target)
"Finds the orthogonal rotation matrix, which, when applied to MATRIX, will rotate MATRIX into as close a congruence with TARGET as possible."
  (let (
        (svd (sv-decomp2 (%* (transpose matrix) target)) )
        )
    (%* (select svd 0) (transpose (select svd 2))) ) )

;;Generalized Singular Value Decomposition

(defun gsv-decomp (a w q)
"Args: (A W Q)
A is a real matrix, W and Q are positive definite diagonal matrices.
Computes the generalized singular value decomposition of A such that A = NUM'
where N'WN = M'QM = I. Returns a four-element list of the form (N U M FLAG)
where N and M are matrices whose columns are the left and right generalized
singular vectors of A and U is the sequence of generalized singular values of
A. FLAG is T if the algorithm converged, NIL otherwise."
  (cond
    ((>= (select (size a) 0) (select (size a) 1))    ;nrows >= ncols
     (let* ((b (matmult (sqrt w) a (sqrt q)))
            (svd (sv-decomp b))
            )
       (list (matmult (sqrt (inverse w)) (select svd 0))
             (select svd 1)
             (matmult (sqrt (inverse q)) (select svd 2))
             (select svd 3))))
    (t                                               ;nrows < ncols
     (let* ((b (matmult (sqrt w) a (sqrt q)))
            (svd (sv-decomp (transpose b)))
            )
       (list (matmult (sqrt (inverse w)) (select svd 2))
             (select svd 1)
             (matmult (sqrt (inverse q)) (select svd 0))
             (select svd 3))))))

(defun distance-matrix (matrix)
"Args: (matrix)
Takes a nxm multivariate matrix and returns a euclidean distance matrix."
  (let* ((cpd (matmult matrix (transpose matrix)))
         (ssq (diagonal cpd))
         (n (length ssq))
         )
    (sqrt (+ (outer-product ssq (repeat 1 n))
             (* -2 cpd)
             (outer-product (repeat 1 n) ssq)))))

(defun gs-orthog (matrix
                  &key (column-order (iseq
                                      (select (array-dimensions matrix) 1)))
                       (unpermute 'nil)
                       (norm 'nil))
"Method Args: (MATRIX &key (COLUMN-ORDER (iseq num-columns-in-matrix)) (UNPERMUTE 'nil) (NORM 'nil))
Performs a Gram-Schmidt Orthogonalization on the columns of MATRIX.
XT = X* ;   T = (inverse R) {from QDR decomp};   (X*'X*) = D or I depending on whether orthog. or orthonorm. was performed. Returns a list of lists containing: COLUMN-ORDER (order in which columns of X were orthogonalized), X*, T. If the keyword argument :COLUMN-ORDER is provided and the keyword argument :UNPERMUTE is given a 'nil value (the default), then the GS-Orthogonalization will be performed on the matrix whose columns are appropriately permuted. (XP)T = (X*P). In this case, the result list will contain the results from the !!!permuted!!! form of MATRIX: [COLUMN-ORDER, (X*P), & T]. If, in this same case, the keyword argument :UNPERMUTE is supplied a value of 't, the solution will be determined as above, but the final results will un-permute the X and X* matrices and appropriately permute the transformation such that the GS equations hold as: X [inverse(TP') P'] = X*. Here, the result list contains the original matrix X, the transformation matrix which orthogonalizes the columns in the proper order
[inverse(TP') P'], and the un-permuted transformed data matrix, X* (which contains columns orthogonalized in COLUMN-ORDER but left in their original matrix positions). If the keyword argument  :NORM T  is provided, then a GS-OrthoNORMALization will be performed instead of an orthogonalization."

  (if (/= (length (remove-duplicates column-order))
          (select (array-dimensions matrix) 1))
      (error "Insufficient number of elements specified in :COLUMN-ORDER sequence."))

  (let* (
         (permuted-matrix (permute-matrix matrix column-order))
         (sol (if norm
                  (qr-decomp2 permuted-matrix :pivot nil)
                  (qdr-decomp permuted-matrix :pivot nil)) )
         )
    (if unpermute

        (let (
              (unpermutation (mapcar #'(lambda (x) (position x column-order))
                                   (iseq (length column-order))))
              )
          (if norm
              (list column-order
                    (permute-matrix (select sol 0) unpermutation)
                    (permute-matrix (inverse (permute-matrix
                                              (select sol 1)
                                              unpermutation))
                                    unpermutation) )
              (list column-order
                    (permute-matrix (%* (select sol 0)
                                        (diagonal (select sol 1)))
                                    unpermutation)
                    (permute-matrix (inverse (permute-matrix
                                              (select sol 2)
                                              unpermutation))
                                    unpermutation) ) ))
        (if norm
            (list column-order
                  (select sol 0)
                  (inverse (select sol 1)))
            (list column-order
                  (%* (select sol 0) (diagonal (select sol 1)))
                  (inverse (select sol 2)))) ) ))

(defun permute-matrix (matrix new-order &key (col t) (row nil))
"Args: (MATRIX NEW-ORDER &key (COL t) (ROW nil))
Permutes the columns (or rows) of MATRIX into the order specified by NEW-ORDER.  By default the columns are permuted.  If the keyword argument COL is 'nil or ROW is 't, the rows of the MATRIX will be permuted."
  (let (
        (ad (array-dimensions matrix))
        (new-order (remove-duplicates new-order))
        )

    (if (or row (null col))
        (if (/= (length new-order) (select ad 0))
            (error "Incorrect length of new order list.")
            (bind-rows2 (select (row-list2 matrix :list t) new-order)))

        (if (/= (length new-order) (select ad 1))
            (error "Incorrect length of new order list.")
            (bind-columns2 (select (column-list matrix) new-order))) ) ))


(defun qdr-decomp (matrix &key (pivot nil))
"Args: (MATRIX &key (PIVOT 'nil)
A modification to QR-DECOMP2 which performs the factorization:X = QDR; Q'Q = I where D = (diagonal of original R); R = original R with its diagonal removed = (inverse D) %* (original R). Returns a list of lists containing: Q D R & (if PIVOT, then column-order). MATRIX is a matrix of real numbers with at least as many rows as columns. Computes the QR factorization of A and returns the result in a list of the form (Q D R).  If PIVOT is true the columns of X are first permuted to insure that the absolute values of the diagonal elements of R are nonincreasing.  In this case the result includes a third element, a list of the indices of the columns in the order in which they were used.  Also (from QR-DECOMP2) the diagonal values of R are all now guaranteed to be positive and the parameter PIVOT has been changed to a keyword parameter."
  (let* (
         (qr (qr-decomp2 matrix :pivot pivot) )
         (d (diagonal (select qr 1)) )
         (r* (%* (diagonal (/ 1 d)) (select qr 1)) )
         )
    (list (select qr 0) d r* (if pivot (select qr 2))) ))

(defun qr-decomp2 (matrix &key (pivot nil))
"Args: (MATRIX &key (PIVOT 'nil)
A slight modification to QR-DECOMP which insures that the diagonal values of R are all positive.  Also the parameter, PIVOT, has been changed from an optional parameter (in the original QR-DECOMP function) to a keyword parameter. MATRIX is a matrix of real numbers with at least as many rows as columns. Computes the QR factorization of A and returns the result in a list of the form (Q R).  If PIVOT is true the columns of X are first permuted to insure that the absolute values of the diagonal elements of R are nonincreasing.  In this case the result includes a third element, a list of the indices of the columns in the order in which they were used."
  (let* (
         (qr (qr-decomp matrix pivot) )
         (d (diagonal (select qr 1)) )
         (ones (repeat 1 (select (array-dimensions matrix) 1)) )
         (p (mapcar #'(lambda (i) (setf (select ones i) -1))
                    (which (minusp d))) )
         )
    (list (%* (select qr 0) (diagonal ones))
          (%* (diagonal ones) (select qr 1))
          (if pivot (select qr 2))) ))


;;functions by PV to deal with missing data 30.7.98

(defun non-missing (var)
"Args: VAR. Takes a var with missing values and gives the var without them"
  (select var (which (map-elements 'not (map-elements 'equal nil var)))))

(defun id-non-missing (var)
"Args: VAR. Takes a var with missing values and gives the position of  them "
  (which (map-elements 'not (map-elements 'equal nil var))))

(defun function-with-missing (f data &optional args)
  "Args: F is a function that returns a list or a vector as a result.
DATA a list or a vector.
&optional args. A list with optional args for the function F. Returns the results of function for the non-nil elements of data. NIL elements are returned untouched. Example (function-with-missing (#'log data (list 10)) will return log base 10 for data."
    (let ( (data (copy-list data))
           (args args)
           (f f))
      (setf (select data (id-non-missing data)) (apply f (non-missing data) args))
      data))


;----------------------------
;chi-square related functions
;----------------------------

(defun chisq-expected (x)
"Arg: X
Calculates and returns a matrix of expected values for matrix X based on simple chi-square additivity model"
  (let* ((sizes (array-dimensions x))
         (nrows (first sizes))
         (ncols (second sizes)))
    (cond
      ((= 1 nrows)
       (matrix (list 1 ncols) (* (repeat 1 ncols) (/ (sum x) ncols))))
      ((= 1 ncols)
       (matrix (list nrows 1) (* (repeat 1 nrows) (/ (sum x) nrows))))
      (t
       (if (> (sum x) 0)
           (/ (matmult (bind-columns (mapcar #'sum (row-list x)))
                       (bind-rows (mapcar #'sum (column-list x))))
              (sum x))
           nil)))))


(defun chisq (x)
"Arg: X
Calculates Pearson's Chi-Square for matrix X.
Returns nil if expected values contain zero, chisq otherwise."
  ;(flet ((chisq-expected (x)
  ;        (if (> (sum x) 0)
  ;            (/ (matmult (bind-columns (mapcar #'sum (row-list x)))
  ;                        (bind-rows (mapcar #'sum (column-list x))))
  ;               (sum x))
  ;            nil))))

    (if (not (matrixp x))
        (error "Argument must be a matrix")
        (let* ((x-hat (chisq-expected x))
               (x-dif (if x-hat (- x x-hat)
                          nil)))
          (if (not x-hat)
              nil
              (cond
                ((which (> machine-epsilon (combine x-hat)))
                 nil)
                ((not x-dif)
                 nil )
                (t
                 (sum (/ (* x-dif x-dif) x-hat))))))))

(defun chisq-df (x)
"Arg: X
Calculates the degrees of freedom for Pearson's Chi-Square, given matrix X."
  (if (not (matrixp x))
      (error "Argument must be a matrix")
      (let ((rc (array-dimensions x)))
        (cond
          ((= (first rc) 1) (- (second rc) 1))
          ((= (second rc) 1)(- (first rc) 1))
          (t (* (1- (first rc)) (1- (second rc))))))))

(defun chisq-p-value (chisq chisq-df)
"Arg: CHISQ CHISQ-DF
Calculates the P-value of a CHISQ value which has CHISQ-DF degrees of freedom."
  (if (= chisq-df 0)
      (fatal-message "Zero degrees of freedom for Chi Square statistic.")
  (- 1 (chisq-cdf chisq chisq-df))))

(defun phi (x)
"Arg: X
Calculates the Phi-coefficient for matrix X."
  (if (not (matrixp x))
      (error "Argument must be a matrix")
      (let ((sumx (sum x)))
        (if (> sumx 0)
            (sqrt (/ (chisq x) (sum x)))
            ~~~))))


(defun binomial (x)
"Arg: X
Calculates the Binomial-coefficient for matrix X."
  (if (not (matrixp x))
      (error "Argument must be a matrix")
      (- 1 (normal-cdf (sqrt (chisq x))))))

; Functions to calculate the Cochran-Mantel-Haenszel
; statistic, for testing general association
; across n-way tables, n>2


(defun cmh (z)
"Args: Z, an n-way array
Calculates Cochran-Mantel-Haenszel Chi-square statistic CMH for testing general association among n 2-way tables."
;(terpri)
;(terpri)
     (let* ((matlist (array-list z (list 0 1)))
 ;           (dummy (print (list "matlist" matlist)))
            (sum-numerators
             (^ (sum (mapcar #'(lambda (mat)
 ;                                (print (list "mat" mat))
 ;                                (print (list "numer" (num-piece mat)))
                                 (num-piece mat))
                             matlist)) 2))
            (sum-denominators
             (sum (mapcar #'(lambda (mat)
 ;                             (print (list "denom" (vh mat)))
                              (vh mat)) matlist))))
 ;      (print (list "sum-numer" sum-numerators))
 ;      (print (list "sum-denom" sum-denominators))
 ;      (print (list "cmh=" (/ sum-numerators sum-denominators)))
       (sum (/ sum-numerators sum-denominators))))


(defun num-piece (x)
"CMH numerator"
  (let* ((exsq (chisq-expected x))
         (rows (array-dimension x 0))
         (cols (array-dimension x 1))
         (ocmh (select x (iseq 0 (- rows 2)) (iseq 0 (- cols 2))))
         (ecmh (select exsq (iseq 0 (- rows 2)) (iseq 0 (- cols 2))))
         )
    (- ocmh ecmh)))


(defun vh (x)
"CMH denominator"
 (/ (* (apply #'* (mapcar #'sum (row-list x)))
       (apply #'* (mapcar #'sum (column-list x))))
    (*(^ (sum x) 2) (- (sum x) 1))))

#|
(defun num-piece (x)
"CMH numerator"
  (let* ((ocmh (select x (iseq 0 (- (array-dimension x 0) 2))
                       (iseq 0 (- (array-dimension x 1) 2))))
         (ecmh (select (chisq-expected x)
                       (iseq 0 (- (array-dimension (chisq-expected x) 0) 2))
                       (iseq 0 (- (array-dimension (chisq-expected x) 1) 2)))))
    (- ocmh ecmh)))
|#

(defun cmh-p (cmh df)
"Args: CMH DF
Returns the p-value for CMH, the Cochran-Mantel-Haenszel statistic, when there are DF degrees of freedom"
  (if (= df 0)
      (fatal-message "Zero degrees of freedom for CMH statistic.")
      (- 1 (chisq-cdf cmh df))))

(defun cmh-df (x)
"Args FreqMat
Calculates the df for Cochran-Mantel-Haenszel statistic, from the frequency matrix X"
:fwy???
 ; (* (- (array-dimension x 1) 1) (- (array-dimension x 2) 1))
   (* (- (array-dimension x 0) 1) (- (array-dimension x 1) 1)))



(defmacro symbol-to-string (symbol)
  (string-downcase (format nil "~s" symbol)))

(defun variablep (var-symbol)
     (or (numericp var-symbol) (categoryp var-symbol) (ordinalp var-symbol)))

(defmacro categoryp (var-symbol)
     (let* ((string (string-downcase (format nil "~s" var-symbol)))
            (var-type (string-downcase (var-type string))))
       (equal var-type "category")))

(defmacro numericp (var-symbol)
     (let* ((string (string-downcase (format nil "~s" var-symbol)))
            (var-type (string-downcase (var-type string))))
       (equal var-type "numeric")))

(defmacro ordinalp (var-symbol)
     (let* ((string (string-downcase (format nil "~s" var-symbol)))
            (var-type (string-downcase (var-type string))))
       (equal var-type "ordinal")))

(defun categorize-numeric-variable (numeric-var category-var)
"Args: Numeric Category
Groups the values in NUMERIC according to CATEGORY classes"
  (mapcar #'(lambda (class)
              (select numeric-var (which (map-elements #'equal class category-var))))
          (remove-duplicates category-var :test #'equal)))

;;; Functions to generate a random sample from a multivariate data object
;;;(c) Rubn Ledesma 2003

(defun random-sample ()
  (setf text-n (send edit-text-item-proto :new "" :text-length 11))
  (setf OK (send modal-button-proto :new "Ok"
                 :action
               #'(lambda ()
                   (let (
                         (dialog (send ok :dialog))
                        (n (read-from-string (send text-n :text)))
                         )
                        (cond ((< (send current-data :nobs) n)
                                (fatal-message "Choose a sample smaller than your data"))
                                (t
                                     (funsample n )))))))


  (setf cancel (send modal-button-proto :new "Cancel"
                     :action
               #'(lambda ()
                   (let (
                         (dialog (send cancel :dialog))
                         )
                     (send dialog :modal-dialog-return nil)))))
  (setf dialog
      (send modal-dialog-proto :new
            (list
             (list "RANDOM SAMPLE")
             (list "Sample size" text-n)
             (list ok cancel))
   :default-button ok
            ))
(setf result (send  dialog :modal-dialog))
             result)

(defun funsample (n)
  (setf index  (sort-data (select (rank (UNIFORM-RAND (send current-data :nobs))) (iseq n))))
(data "sample"
      :variables (send current-data :active-variables '(all))
      :types (send current-data :active-types '(all))
      :labels (select (send current-data :labels) index)
      :created  (send *desktop* :selected-icon)
      :data
      (combine
       (select (row-list (send current-data :data-matrix)) index))))


(provide "statfunc")